/* -------------------------------------------------------------------
			    Problem set #5

		  Source code for two layer network
------------------------------------------------------------------- */

#include "g.hh"
#include "network.hh"

// Set up two weight matrices, one which will multiply the inputs and
// the other which will multiply the hidden units.  Even though they
// start off as 0x0, they are resized by initialize() to be
// input_weights: (hidden_units) by (input_units + 1) 
// hidden_weights: (output_units) by (hidden_units + 1)
// The '+ 1' is because a bias unit should be added to both the input
// layer and the hidden layer.
static matrix input_weights(0, 0);
static matrix hidden_weights(0, 0);

// Set up some useful variables which will be set to the number of
// input units, hidden units, output units and the total number of
// weights.  Note that input_units and hidden_units do not count their
// bias units.
static int input_units = 0;
static int hidden_units = 0;
static int output_units = 0;
int total_weights = 0;

// Initialize weight matrices, must be called in main()
// PROVIDED
void initialize(int inputs, int outputs)
{
  input_units = inputs;
  output_units = outputs;

  read_parameter("hiddens", hidden_units, 0);
  if (hidden_units == 0) {
    cerr << "ERROR: Number of hidden units not specified in parameters file\n";
  }

  input_weights.resize(hidden_units, input_units + 1);
  hidden_weights.resize(output_units, hidden_units + 1);

  total_weights = input_weights.rows() * input_weights.columns()
    + hidden_weights.rows() * hidden_weights.columns();
}

// Randomize weight matices, should be called in main()
// YOU WRITE
void randomize_weights(double range)
{
  // Your code here
  // Set elements of input_weights and hidden_weights to random numbers
}

// Calculate partial derviative of output with respect to weights
// Weights represented as one big vector
// YOU WRITE
matrix model_gradient(vector& input_data)
{
  // Your code here
}

// Calculate output of model given input
// YOU WRITE
vector model_output(vector& input_data)
{
  // Your code here
}

// Increment the weights by delta_w, which is encoded in the vector
// representation of the weights
// YOU WRITE
void increment_weights(vector& delta_w)
{
  // Your code here
}

// Print out the details of the model
// PROVIDED
void print_model()
{
  cout << "Two layer sigmoid network\n";
  cout << "Input units (not counting bias unit): " << input_units << "\n";
  cout << "Hidden units (not counting bias unit): " << hidden_units << "\n";
  cout << "Output units: " << output_units << "\n\n";

  for (int i = 0; i < output_units; i++)
    cout << "Output unit #" << (i + 1) << " = "
	 << "g([" << hidden_weights[i].to_string() << "]^T * [hiddens, 1])\n";
  
  cout << "\n";

  for (i = 0; i < hidden_units; i++)
    cout << "Hidden unit #" << (i + 1) << " = "
	 << "g([" << input_weights[i].to_string() << "]^T * [inputs, 1])\n";

  cout << "\n";
}

// For two layer net only, return the value of the hidden units
// PROVIDED
vector hidden_output(vector& input_data)
{
  return g(input_weights * augment(input_data));
}